{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# XML Testing\n",
    "\n",
    "Author @ysbecca"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from xml.dom import minidom\n",
    "import numpy as np\n",
    "import openslide\n",
    "from openslide import open_slide  \n",
    "from openslide.deepzoom import DeepZoomGenerator\n",
    "from glob import glob"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "path = 'wsi_data/A02.xml'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "xml = minidom.parse(path)\n",
    "# The first region marked is always the tumour delineation\n",
    "regions_ = xml.getElementsByTagName(\"Region\")\n",
    "regions, region_labels = [], []\n",
    "for region in regions_:\n",
    "    vertices = region.getElementsByTagName(\"Vertex\")\n",
    "    attribute = region.getElementsByTagName(\"Attribute\")\n",
    "    if len(attribute) > 0:\n",
    "        r_label = attribute[0].attributes['Value'].value\n",
    "    else:\n",
    "        r_label = region.getAttribute('Text')\n",
    "    region_labels.append(r_label)\n",
    "    \n",
    "    # Store x, y coordinates into a 2D array in format [x1, y1], [x2, y2], ...\n",
    "    coords = np.zeros((len(vertices), 2))\n",
    "    \n",
    "    for i, vertex in enumerate(vertices):\n",
    "        coords[i][0] = vertex.attributes['X'].value\n",
    "        coords[i][1] = vertex.attributes['Y'].value\n",
    "        \n",
    "    regions.append(coords)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(17,)\n",
      "(17,)\n"
     ]
    }
   ],
   "source": [
    "print(np.shape(regions))\n",
    "print(np.shape(region_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Benign',\n",
       " 'Benign',\n",
       " 'Benign',\n",
       " 'Benign',\n",
       " 'Benign',\n",
       " 'Carcinoma in situ',\n",
       " 'Carcinoma in situ',\n",
       " 'Carcinoma in situ',\n",
       " 'Carcinoma in situ',\n",
       " 'Carcinoma in situ',\n",
       " 'Carcinoma in situ',\n",
       " 'Benign',\n",
       " 'Carcinoma in situ',\n",
       " 'Carcinoma in situ',\n",
       " 'Carcinoma invasive',\n",
       " 'Carcinoma invasive',\n",
       " 'Carcinoma invasive']"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "region_labels"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Determine whether a point is within a polygon given its vertices."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "from shapely.geometry import Polygon, Point"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "label_map = {'Normal': 0,\n",
    "             'Benign': 1,\n",
    "             'Carcinoma in situ': 2,\n",
    "             'Invasive carcinoma': 3,\n",
    "            }\n",
    "\n",
    "\n",
    "def generate_label(regions, region_labels, point):\n",
    "    # regions = array of vertices (all_coords)\n",
    "    # point [x, y]\n",
    "    for i in range(len(region_labels)):\n",
    "        poly = Polygon(regions[i])\n",
    "        if poly.contains(Point(point[0], point[1])):\n",
    "            return label_map[region_labels[i]]\n",
    "    return label_map['Normal']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "generate_label(regions, region_labels, [7500, 21600])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "patch_size = 256\n",
    "percent_overlap = 0\n",
    "file_dir = \"wsi_data/\"\n",
    "file_name = \"A01.svs\"\n",
    "xml_file = \"A01.xml\"\n",
    "xml_dir = \"wsi_data/\"\n",
    "level = 12"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "overlap = int(patch_size*percent_overlap / 2.0)\n",
    "tile_size = patch_size - overlap*2\n",
    "\n",
    "slide = open_slide(file_dir + file_name) \n",
    "tiles = DeepZoomGenerator(slide, tile_size=tile_size, overlap=overlap, limit_bounds=False)\n",
    "\n",
    "if level >= tiles.level_count:\n",
    "    print(\"Error: requested level does not exist. Slide level count: \" + str(tiles.level_count))\n",
    "\n",
    "x_tiles, y_tiles = tiles.level_tiles[level]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "16\n",
      "11\n"
     ]
    }
   ],
   "source": [
    "print(x_tiles)\n",
    "print(y_tiles)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(20480, 8192)"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tiles.get_tile_coordinates(level, (5, 2))[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "patches, coords, labels = [], [], []\n",
    "x, y = 0, 0\n",
    "count = 0\n",
    "while y < y_tiles:\n",
    "    while x < x_tiles:\n",
    "        new_tile = np.array(tiles.get_tile(level, (x, y)), dtype=np.int)\n",
    "        # OpenSlide calculates overlap in such a way that sometimes depending on the dimensions, edge \n",
    "        # patches are smaller than the others. We will ignore such patches.\n",
    "        if np.shape(new_tile) == (patch_size, patch_size, 3):\n",
    "            patches.append(new_tile)\n",
    "            coords.append(np.array([x, y]))\n",
    "            count += 1\n",
    "\n",
    "            # Calculate the patch label based on centre point.\n",
    "            if xml_file:\n",
    "                converted_coords = tiles.get_tile_coordinates(level, (x, y))[0]\n",
    "                labels.append(generate_label(regions, region_labels, converted_coords))\n",
    "        x += 1\n",
    "    y += 1\n",
    "    x = 0\n",
    "\n",
    "# image_ids = [im_id]*count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(150, 256, 256, 3)\n",
      "(150, 2)\n",
      "(150,)\n"
     ]
    }
   ],
   "source": [
    "print(np.shape(patches))\n",
    "print(np.shape(coords))\n",
    "print(np.shape(labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "29"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.count_nonzero(labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "name = 'tester.svs'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'tester'"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "name[:-4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
